library(tidyverse)
library(lubridate)

goodreads_raw <- read_csv("/Volumes/GoogleDrive/My Drive/data_stuff/R/goodreads/gr_04_20_22.csv")
#load raw data
goodreads <- goodreads_raw %>%
  transmute(
    book_id = `Book Id`,
    title = `Title`,
    author = `Author l-f`,
    add_authors = `Additional Authors`,
    isbn13 = str_remove(ISBN13, "^=\""),
    isbn13 = str_remove(isbn13, "\"$"),
    isbn13 = na_if(isbn13, ""),
    my_rating = `My Rating`,
    avg_rating = `Average Rating`,
    n_pages = `Number of Pages`,
    publication_y = `Original Publication Year`,
    date_read = `Date Read`,
    date_added = `Date Added`,
    shelf = factor(`Exclusive Shelf`, levels = unique(`Exclusive Shelf`)),
    review = `My Review`,
    `Author l-f`
  ) %>%
  separate(`Author l-f`, into = c('author_first','author_last'), sep = ',' ) %>%
  mutate(
    tolower(str_trim(author_first)),
    tolower(str_trim(author_last))
  )

#transform read data
gr_read <- goodreads %>% filter(is.na(date_read) == F) %>%
  arrange(date_read) %>%
  mutate(
    date_start = lag(date_read),
    rn = row_number()
) %>%
 transmute(
   rn,
   book_id,
   title,
   author_first,
   author_last,
   n_pages,
   date_start = ymd(date_start),
   date_read = ymd(date_read),
   year_read = year(date_read),
   duration_days = difftime(ymd(date_read), ymd(date_start), unit = 'days')) %>%
  mutate(
    duration_days = replace_na(as.integer(str_trim(substr(duration_days,1,3))),1)) 
gr_read$duration_days[gr_read$duration_days == 0] <- 1
year_gr_read <- 
  gr_read %>%
  group_by(year_read)%>%
  summarize(mean = mean(duration_days, na.rm = TRUE),
            count= n())
library(plotly)

# DONE hover over shows book,
#start and end date
# average duration in days by year
# color by genre

p <- plot_ly()
for(i in 1:(nrow(gr_read) - 1)){

  p <- add_trace(p,
                 x = c(gr_read$date_start[i],gr_read$date_read[i]), 
                 y = c(gr_read$rn[i],gr_read$rn[i]), 
                 mode = "lines",
                 type = 'scatter',
                 line = list(width = 10),
                 #facet_col = 'year_read',
                # line = list(color = df$color[i], width = 20),
                 showlegend = F,
                 hoverinfo = "text",
                 text = paste("Book: ", gr_read$title[i], "<br>",
                              "Start Date: ", gr_read$date_start[i], "<br>",
                              "End Date: ", gr_read$date_read[i], "<br>",
                              "Duration: ", gr_read$duration_days[i], ' Days')
  )
}

p <- p %>% layout(
                  title = 'Books Read by Date and Read Time',
                  yaxis = list(
                    title = "Cumulative Read",
                    zeroline = F
                  ))
p

to test

plot_ly() %>% #for(i in 2:(nrow(gr_read) - 1)){ add_trace(p, x = c(gr_read\(date_start[4],gr_read\)date_read[4]), y = c(gr_read\(rn[4],gr_read\)rn[4]), mode = “lines”, type = ‘scatter’, line = list(width = 10), facet_col = year_read, showlegend = F, hoverinfo = “text”, text = paste(“Book:”, gr_read\(title[4], "<br>", "Start Date: ", gr_read\)date_start[4], “
”, “End Date:”, gr_read\(date_read[4], "<br>", "Duration: ", gr_read\)duration_days[4], ’ Days’) #evaluate = T )